package com.leon.alishop.spider.util;

import java.io.IOException;
import java.net.HttpURLConnection;
import java.net.URL;

import org.htmlparser.Parser;
import org.htmlparser.util.ParserException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class ParserUtil {

	private static Logger log = LoggerFactory
			.getLogger(ParserUtil.class);
	
	public final static int CONNECT_TIMEOUT = 10000;
	public final static int READ_TIMEOUT = 10000;
	
	
	public static Parser newParser(String url) throws IOException {
		URL urlPage = new URL(url);
		HttpURLConnection conn = (HttpURLConnection) urlPage.openConnection();
		conn.setConnectTimeout(CONNECT_TIMEOUT);
		conn.setReadTimeout(CONNECT_TIMEOUT);
		int maxRetry = 5;
		while (maxRetry-->0) {
			try {
				return new Parser(conn);
			} catch (ParserException e) {
				log.warn("访问出错"+url,e);
			}
		}
		return null;
	}

}
